# -*- coding: utf-8 -*-
import scrapy
import re
from scrapy.selector import Selector
from scrapy.http import Request
from Movies.items import MoviesItem


class Dytt8Spider(scrapy.Spider):
    name = 'dytt8'
    allowed_domains = ['dytt8.net']
    home_url = "http://www.dytt8.net"
    start_urls = [home_url + "/html/gndy/dyzz/list_23_1.html"]

    def parse(self, response):
        # iterm_dic = MoviesItem()
        selector = Selector(response)
        urls_lst = selector.xpath('//table//tr[2]/td[2]/b/a/@href').extract()
        for url in urls_lst:
            movie_web_href = self.home_url + url
            yield Request(movie_web_href, callback=self.parse_items)

        for index in range(2, 3):
            web_page = self.home_url + "/html/gndy/dyzz/list_23_%s.html" % index
            yield Request(web_page, callback=self.parse)

    def parse_items(self, response):
        items = MoviesItem()
        selector = Selector(response)
        items["movie_summary"] = selector.xpath('//h1/font/text()').extract()

        movie_info = selector.xpath('//*[@id="Zoom"]/td').extract_first()

        items["movie_name"] = re.findall(u"《(.*?)》", items["movie_summary"][0])
        items["movie_release_year"] = re.findall(u"◎年　　代(.*?)<br", movie_info)
        items["movie_release_year_more"] = re.findall(u"◎上映日期(.*?)<br", movie_info)
        items["movie_place"] = re.findall(u"◎产　　地(.*?)<br", movie_info)
        items["movie_language"] = re.findall(u"◎类　　别(.*?)<br", movie_info)
        items["movie_duration"] = re.findall(u"◎片　　长(.*?)<br", movie_info)
        items["movie_score"] = re.findall(u"◎豆瓣评分(.*?)<br", movie_info)
        items["movie_guide"] = re.findall(u"◎导　　演(.*?)<br", movie_info)
        items["movie_actors"] = re.findall(u"◎主　　演(.*?)<br>", movie_info)
        items["movie_download_url_ftp"] = re.findall(r'<a .*?>(ftp.*?)</a>', movie_info)

        # items["movie_name"] = movie_info.xpath(u"text()[contains(. , '◎片　　名')]").extract_first(default="N/A")
        # items["movie_release_year"] = movie_info.xpath(u"text()[contains(. , '◎年　　代')]").extract_first(default="N/A")
        # items["movie_release_year_more"] = movie_info.xpath(u"text()[contains(. , '◎上映日期')]").extract_first(default="N/A")
        # items["movie_place"] = movie_info.xpath(u"text()[contains(. , '◎产　　地')]").extract_first(default="N/A")
        # items["movie_language"] = movie_info.xpath(u"text()[contains(. , '◎类　　别')]").extract_first(default="N/A")
        # items["movie_duration"] = movie_info.xpath(u"text()[contains(. , '◎片　　长')]").extract_first(default="N/A")
        # items["movie_score"] = movie_info.xpath(u"text()[contains(. , '◎豆瓣评分')]").extract_first(default="N/A")
        # items["movie_guide"] = movie_info.xpath(u"text()[contains(. , '◎导　　演')]").extract_first(default="N/A")
        # items["movie_actors"] = movie_info.xpath(u"text()[contains(. , '◎主　　演')]").extract_first(default="N/A")
        # items["movie_download_url_ftp"] = movie_info1.xpath('table/tbody/tr/td/a/@href').extract_first()

        return items






